{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2376c59b",
   "metadata": {},
   "source": [
    "### 协方差散度矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4d071160",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:29:01.337369Z",
     "start_time": "2022-06-15T12:29:01.137425Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2.33333333, -6.83333333, -5.        ],\n",
       "       [-6.83333333, 24.33333333, 17.        ],\n",
       "       [-5.        , 17.        , 12.        ]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "A = np.random.randint(0,10,size = (3,3))\n",
    "# 协方差\n",
    "cov = np.cov(A,rowvar=True)\n",
    "cov"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "99abbd42",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:32:45.507897Z",
     "start_time": "2022-06-15T12:32:45.498929Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[4, 1, 2],\n",
       "       [0, 8, 9],\n",
       "       [0, 6, 6]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7399a025",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:34:37.833520Z",
     "start_time": "2022-06-15T12:34:37.819531Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2.33333333, 2.33333333],\n",
       "       [2.33333333, 2.33333333]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.cov(A[0],A[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d4cce888",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:35:58.559501Z",
     "start_time": "2022-06-15T12:35:58.545523Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.3333333333333335"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "((A[0] - A[0].mean())**2).sum()/(3-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c04de8d0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:30:59.618939Z",
     "start_time": "2022-06-15T12:30:59.604796Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2.33333333, -6.83333333, -5.        ],\n",
       "       [-6.83333333, 24.33333333, 17.        ],\n",
       "       [-5.        , 17.        , 12.        ]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 散度矩阵\n",
    "B = (A - A.mean(axis = 1).reshape(-1,1))\n",
    "scatter = B.dot(B.T)\n",
    "display(scatter/(3 - 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "388199ac",
   "metadata": {},
   "source": [
    "### 特征值和特征向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f13e35fd",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:45:01.273811Z",
     "start_time": "2022-06-15T12:45:01.262326Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[8, 2, 8],\n",
       "       [3, 2, 5],\n",
       "       [9, 8, 3]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = np.random.randint(0,10,size = (3,3))\n",
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6b416425",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:45:04.980477Z",
     "start_time": "2022-06-15T12:45:04.971957Z"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([16.75565381,  1.67151088, -5.4271647 ])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "array([[-0.67330165, -0.6512756 , -0.43197073],\n",
       "       [-0.35639922,  0.67516   , -0.37706241],\n",
       "       [-0.64779972,  0.34640881,  0.81928336]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "w,v = np.linalg.eig(A)\n",
    "display(w,v)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d795f891",
   "metadata": {},
   "source": [
    "<font size = 5>$Av = \\lambda v$</font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "3f2e7aef",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:46:04.778610Z",
     "start_time": "2022-06-15T12:46:04.759899Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-11.28160943,  -5.971702  , -10.85430783])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A.dot(v[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "bf681dcb",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:46:22.039125Z",
     "start_time": "2022-06-15T12:46:22.021648Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-11.28160943,  -5.971702  , -10.85430783])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w[0] * v[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "11afb8ba",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:49:44.941438Z",
     "start_time": "2022-06-15T12:49:44.931444Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[8., 2., 8.],\n",
       "       [3., 2., 5.],\n",
       "       [9., 8., 3.]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "v.dot(np.diag(w)).dot(np.linalg.inv(v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "1bd14a2d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:50:01.844485Z",
     "start_time": "2022-06-15T12:50:01.836507Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[8, 2, 8],\n",
       "       [3, 2, 5],\n",
       "       [9, 8, 3]])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b16bf4b",
   "metadata": {},
   "source": [
    "### PCA降维"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c87c1798",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:53:59.090036Z",
     "start_time": "2022-06-15T12:53:58.962364Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.decomposition import PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "6af51605",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T12:54:18.849951Z",
     "start_time": "2022-06-15T12:54:18.832330Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.1, 3.5, 1.4, 0.2],\n",
       "       [4.9, 3. , 1.4, 0.2],\n",
       "       [4.7, 3.2, 1.3, 0.2],\n",
       "       [4.6, 3.1, 1.5, 0.2],\n",
       "       [5. , 3.6, 1.4, 0.2]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "X,y = datasets.load_iris(return_X_y=True)\n",
    "display(X[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "2f2346e7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:26:38.969998Z",
     "start_time": "2022-06-15T13:26:38.959818Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.30533786,  0.64836932],\n",
       "       [-1.31993521, -0.35930856],\n",
       "       [-1.40496732, -0.29424412],\n",
       "       [-1.33510889, -0.64613986],\n",
       "       [-1.32702321,  0.6633044 ]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pca = PCA(n_components=0.95,whiten=True) # 筛选特征重要性99%特征\n",
    "X_pca = pca.fit_transform(X)\n",
    "display(X_pca[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a72a1a6",
   "metadata": {},
   "source": [
    "### 自己写代码实现PCA降维【特征分解】"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "d036d57d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:32:34.954057Z",
     "start_time": "2022-06-15T13:32:34.927674Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.30533786,  0.64836932],\n",
       "       [-1.31993521, -0.35930856],\n",
       "       [-1.40496732, -0.29424412],\n",
       "       [-1.33510889, -0.64613986],\n",
       "       [-1.32702321,  0.6633044 ]])"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1、去中心化\n",
    "B = X - X.mean(axis = 0)\n",
    "\n",
    "# 2、协方差\n",
    "V = np.cov(B,rowvar=False,bias = True) # 以列来进行计算的，列是特征\n",
    "\n",
    "# 3、特征值和特征向量\n",
    "w,v = np.linalg.eig(V) # 特征值，从大到小排列\n",
    "# display(w,v)\n",
    "\n",
    "# 符号翻转，绝对值最大的，如果是负数，才翻转\n",
    "max_abs_cols = np.argmax(np.abs(v),axis = 0)\n",
    "signs = np.sign(v[max_abs_cols,[0,1,2,3]]) # 检索\n",
    "v *= signs # 根据条件进行翻转\n",
    "\n",
    "# 4、特征的筛选\n",
    "cond = (w/w.sum()).cumsum() >= 0.95\n",
    "index = cond.argmax()\n",
    "v_ = v[:,:index + 1] # 特征向量筛选\n",
    "\n",
    "# 5、举证运算\n",
    "pca_result = B.dot(v_)\n",
    "pca_result = (pca_result - pca_result.mean(axis =0))/pca_result.std(axis=0,ddof = 1)\n",
    "pca_result[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "f8123196",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:30:54.676071Z",
     "start_time": "2022-06-15T13:30:54.665147Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.85667061, -0.73016143,  0.59791083,  0.75365743])"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_abs_cols = np.argmax(np.abs(v),axis = 0)\n",
    "v[max_abs_cols,[0,1,2,3]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "da9bcf0d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:31:53.865098Z",
     "start_time": "2022-06-15T13:31:53.855133Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1., -1.,  1.,  1.])"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_abs_cols = np.argmax(np.abs(v),axis = 0)\n",
    "signs = np.sign(v[max_abs_cols,[0,1,2,3]]) # 检索\n",
    "signs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "b5fea358",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:27:02.506950Z",
     "start_time": "2022-06-15T13:27:02.494835Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.36138659, -0.65658877, -0.58202985,  0.31548719],\n",
       "       [-0.08452251, -0.73016143,  0.59791083, -0.3197231 ],\n",
       "       [ 0.85667061,  0.17337266,  0.07623608, -0.47983899],\n",
       "       [ 0.3582892 ,  0.07548102,  0.54583143,  0.75365743]])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "325774f2",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:33:33.751749Z",
     "start_time": "2022-06-15T13:33:33.730484Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.92461872, 0.05306648, 0.01710261, 0.00521218])"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w/w.sum() # 计算每个特征值的权重，百分比"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "3d63ba74",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:33:56.980238Z",
     "start_time": "2022-06-15T13:33:56.960300Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.92461872, 0.97768521, 0.99478782, 1.        ])"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(w/w.sum()).cumsum() # 累加和"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "985f62f2",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:34:49.550384Z",
     "start_time": "2022-06-15T13:34:49.540437Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False,  True,  True,  True])"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(w/w.sum()).cumsum() >= 0.95"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "f0ac3d8c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:35:35.025612Z",
     "start_time": "2022-06-15T13:35:35.012690Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cond.argmax() # 当第一个为True时，索引就会返回~"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "d30b258e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:05:36.270545Z",
     "start_time": "2022-06-15T13:05:36.258575Z"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False, False,  True,  True])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cond = (w/w.sum()).cumsum() >= 0.99 # 第一个为True的位置，刚好满足条件\n",
    "display(cond)\n",
    "index = cond.argmax()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "5b7ed009",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:04:54.366406Z",
     "start_time": "2022-06-15T13:04:54.355523Z"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.92461872, 0.97768521, 0.99478782, 1.        ])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(w/w.sum()).cumsum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7684c73",
   "metadata": {},
   "source": [
    "### SVD奇异值分解"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "217f0355",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:40:47.915968Z",
     "start_time": "2022-06-15T13:40:47.896986Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9, 6, 8],\n",
       "       [3, 4, 6],\n",
       "       [7, 0, 9],\n",
       "       [9, 5, 1],\n",
       "       [5, 7, 9]])"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = np.random.randint(0,10,size = (5,3))\n",
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5dbab53",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 满秩矩阵，可以求逆矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "db0c813c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:40:56.945828Z",
     "start_time": "2022-06-15T13:40:56.425521Z"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "LinAlgError",
     "evalue": "Last 2 dimensions of the array must be square",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mLinAlgError\u001b[0m                               Traceback (most recent call last)",
      "Input \u001b[1;32mIn [57]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meig\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m<__array_function__ internals>:180\u001b[0m, in \u001b[0;36meig\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
      "File \u001b[1;32md:\\soft\\python\\396\\lib\\site-packages\\numpy\\linalg\\linalg.py:1304\u001b[0m, in \u001b[0;36meig\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m   1302\u001b[0m a, wrap \u001b[38;5;241m=\u001b[39m _makearray(a)\n\u001b[0;32m   1303\u001b[0m _assert_stacked_2d(a)\n\u001b[1;32m-> 1304\u001b[0m \u001b[43m_assert_stacked_square\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1305\u001b[0m _assert_finite(a)\n\u001b[0;32m   1306\u001b[0m t, result_t \u001b[38;5;241m=\u001b[39m _commonType(a)\n",
      "File \u001b[1;32md:\\soft\\python\\396\\lib\\site-packages\\numpy\\linalg\\linalg.py:203\u001b[0m, in \u001b[0;36m_assert_stacked_square\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m    201\u001b[0m m, n \u001b[38;5;241m=\u001b[39m a\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m:]\n\u001b[0;32m    202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m m \u001b[38;5;241m!=\u001b[39m n:\n\u001b[1;32m--> 203\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m LinAlgError(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLast 2 dimensions of the array must be square\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
      "\u001b[1;31mLinAlgError\u001b[0m: Last 2 dimensions of the array must be square"
     ]
    }
   ],
   "source": [
    "# np.linalg.eig(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "cb4c8220",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:45:44.367452Z",
     "start_time": "2022-06-15T13:45:44.343256Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.56910866,  0.13144845,  0.00599043],\n",
       "       [-0.31746267, -0.21803493, -0.30393608],\n",
       "       [-0.43488654, -0.42249048,  0.75560588],\n",
       "       [-0.35852428,  0.83620844,  0.14020996],\n",
       "       [-0.50760893, -0.23966501, -0.56301679]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "array([23.58326429,  7.07114342,  5.27527971])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "array([[-0.63109732, -0.42531799, -0.64870701],\n",
       "       [ 0.55140286,  0.34222726, -0.76081232],\n",
       "       [ 0.54559239, -0.83784552,  0.01854285]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "U,S,Vt = np.linalg.svd(A,full_matrices=False)\n",
    "display(U,S,Vt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "a0e3ac41",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:46:43.694595Z",
     "start_time": "2022-06-15T13:46:43.676117Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9., 6., 8.],\n",
       "       [3., 4., 6.],\n",
       "       [7., 0., 9.],\n",
       "       [9., 5., 1.],\n",
       "       [5., 7., 9.]])"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 奇异值分解，是约等于\n",
    "U.dot(np.diag(S)).dot(Vt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "5e2f2f90",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:46:51.337996Z",
     "start_time": "2022-06-15T13:46:51.319377Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9, 6, 8],\n",
       "       [3, 4, 6],\n",
       "       [7, 0, 9],\n",
       "       [9, 5, 1],\n",
       "       [5, 7, 9]])"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e89ab43f",
   "metadata": {},
   "source": [
    "### SVD奇异值分解-PCA降维"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "301b0f7f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T14:02:34.406536Z",
     "start_time": "2022-06-15T14:02:34.394039Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.30533786],\n",
       "       [-1.31993521],\n",
       "       [-1.40496732],\n",
       "       [-1.33510889],\n",
       "       [-1.32702321]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.decomposition import PCA\n",
    "X,y = datasets.load_iris(return_X_y=True)\n",
    "pca = PCA(n_components=0.90,whiten=True) # 筛选特征重要性99%特征\n",
    "X_pca = pca.fit_transform(X)\n",
    "display(X_pca[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d0757821",
   "metadata": {},
   "source": [
    "#### SVD分解"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "1bc919db",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T14:02:14.317290Z",
     "start_time": "2022-06-15T14:02:14.294367Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.30533786,  0.64836932, -0.09981716],\n",
       "       [-1.31993521, -0.35930856, -0.75257299],\n",
       "       [-1.40496732, -0.29424412,  0.0640073 ],\n",
       "       [-1.33510889, -0.64613986,  0.11284924],\n",
       "       [-1.32702321,  0.6633044 ,  0.32210314]])"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_components = 2\n",
    "# 1、去中心化\n",
    "B = X - X.mean(axis = 0)\n",
    "\n",
    "# 2、奇异值分解\n",
    "U,S,Vt = np.linalg.svd(B,full_matrices=False)\n",
    "\n",
    "# 3、符号翻转\n",
    "# 符号翻转，绝对值最大的，如果是负数，才翻转\n",
    "max_abs_cols = np.argmax(np.abs(U),axis = 0)\n",
    "signs = np.sign(U[max_abs_cols,[0,1,2,3]]) # 检索\n",
    "U *= signs # 根据条件进行翻转\n",
    "\n",
    "# 4、降维特征筛选\n",
    "# U = U[:,:n_components]\n",
    "cond = (S/S.sum()).cumsum() > 0.90\n",
    "index = cond.argmax()\n",
    "U = U[:,:index + 1]\n",
    "\n",
    "# 归一化\n",
    "U = (U - U.mean(axis = 0))/(U.std(axis = 0,ddof = 1))\n",
    "U[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "6b4347b3",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:57:06.610235Z",
     "start_time": "2022-06-15T13:57:06.592771Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.30533786,  0.64836932],\n",
       "       [-1.31993521, -0.35930856],\n",
       "       [-1.40496732, -0.29424412],\n",
       "       [-1.33510889, -0.64613986],\n",
       "       [-1.32702321,  0.6633044 ]])"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_pca[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "cac5dd30",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T13:59:47.898987Z",
     "start_time": "2022-06-15T13:59:47.885987Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([25.09996044,  6.01314738,  3.41368064,  1.88452351])"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "S # 奇异值，如果根据比例进行筛选，那么得到结论和PCA，筛选的特征，就会不完全一样【接受】"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "7139dd72",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T14:05:30.144622Z",
     "start_time": "2022-06-15T14:05:30.129583Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9, 6, 8],\n",
       "       [3, 4, 6],\n",
       "       [7, 0, 9],\n",
       "       [9, 5, 1],\n",
       "       [5, 7, 9]])"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "321c1492",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-15T14:05:34.671450Z",
     "start_time": "2022-06-15T14:05:34.652512Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5, 3)"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "62dd6de7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "245.76px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
